# Copyright (c) HySoP 2011-2024
#
# This file is part of HySoP software.
# See "https://particle_methods.gricad-pages.univ-grenoble-alpes.fr/hysop-doc/"
# for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
OpenCl backend base interface for fast Fourier Transforms.
:class:`~hysop.numerics.opencl_fft.OpenClFFTI`
:class:`~hysop.numerics.opencl_fft.OpenClFFTPlanI`
:class:`~hysop.numerics.opencl_fft.OpenClFFTQueue`
"""
from abc import abstractmethod
from hysop.tools.htypes import first_not_None, check_instance
from hysop.numerics.fft.fft import FFTQueueI, FFTPlanI, FFTI
from hysop.symbolic.relational import Assignment
from hysop.backend.device.opencl.opencl_array_backend import OpenClArrayBackend
from hysop.backend.device.opencl.opencl_array import OpenClArray
from hysop.backend.device.opencl.opencl_elementwise import (
OpenClElementwiseKernelGenerator,
)
from hysop.backend.device.opencl.opencl_kernel_launcher import (
OpenClKernelListLauncher,
OpenClKernelLauncherI,
)
from hysop.backend.device.opencl.opencl_copy_kernel_launchers import (
OpenClCopyBufferRectLauncher,
OpenClFillKernelLauncher,
)
from hysop.backend.device.opencl.opencl_kernel_launcher import OpenClKernelListLauncher
from hysop.backend.device.opencl.autotunable_kernels.transpose import (
OpenClAutotunableTransposeKernel,
)
[docs]
class OpenClFFTQueue(FFTQueueI):
"""An opencl fft queue is just like a standard opencl queue."""
def __init__(self, queue, name="fft_planner"):
self._launcher = OpenClKernelListLauncher(name=name)
self._queue = queue
def __iadd__(self, kernel):
self._launcher += kernel
return self
[docs]
def execute(self, wait_for=None):
return self._launcher(queue=self._queue, wait_for=wait_for)
[docs]
class OpenClFFTPlanI(FFTPlanI, OpenClKernelLauncherI):
"""
Tag for FFT plans executing on OpenCL backend arrays.
"""
def __init__(self, **kwds):
super().__init__(**kwds)
self._name = "fft_plan"
[docs]
@abstractmethod
def execute(self, **kwds):
pass
def __call__(self, **kwds):
return self.execute(**kwds)
[docs]
class OpenClFFTI(FFTI):
"""
Abstract base for FFT interfaces targetting OpenCL backends.
"""
def __init__(self, cl_env, backend=None, allocator=None, queue=None, **kwds):
from hysop.backend.device.opencl.opencl_array_backend import OpenClArrayBackend
from hysop.backend.device.opencl.opencl_env import OpenClEnvironment
if backend is None:
if queue is None:
queue = cl_env.default_queue
backend = OpenClArrayBackend.get_or_create(
cl_env=cl_env, queue=queue, allocator=allocator
)
else:
msg = "OpenCl environment does not match the one of the backend."
assert backend.cl_env is cl_env, msg
if allocator is not None:
msg = "OpenCl allocator does not match the one of the backend."
assert backend.allocator is allocator, msg
if queue.context != cl_env.context:
msg = "Queue does not match context:"
msg += f"\n *Given context is {cl_env.context}."
msg += f"\n *Command queue context is {queue.context}."
raise ValueError(msg)
check_instance(cl_env, OpenClEnvironment)
check_instance(backend, OpenClArrayBackend)
super().__init__(backend=backend, **kwds)
self.cl_env = cl_env
self.queue = queue
self.kernel_generator = OpenClElementwiseKernelGenerator(cl_env=cl_env)
[docs]
@classmethod
def default_interface(
cls,
cl_env,
backend=None,
allocator=None,
warn_on_allocation=False,
error_on_allocation=True,
warn_on_unaligned_output_offset=True,
**kwds,
):
"""Get the default OpenCl FFT interface which is a GpyFFT interface."""
from hysop.numerics.fft.gpyfft_fft import GpyFFT
return GpyFFT(
cl_env=cl_env,
backend=backend,
allocator=allocator,
warn_on_allocation=warn_on_allocation,
error_on_allocation=error_on_allocation,
warn_on_unaligned_output_offset=warn_on_unaligned_output_offset,
**kwds,
)
[docs]
def new_queue(self, tg, name):
return OpenClFFTQueue(queue=self.queue, name=name)
[docs]
def plan_copy(self, tg, src, dst):
src = self.ensure_buffer(src)
dst = self.ensure_buffer(dst)
launcher = OpenClCopyBufferRectLauncher.from_slices("copy", src=src, dst=dst)
return launcher
[docs]
def plan_accumulate(self, tg, src, dst):
src = self.ensure_buffer(src)
dst = self.ensure_buffer(dst)
src, dst = self.kernel_generator.arrays_to_symbols(src, dst)
expr = Assignment(dst, src + dst)
launcher, _ = self.kernel_generator.elementwise_kernel("accumulate", expr)
return launcher
[docs]
def plan_transpose(self, tg, src, dst, axes):
src = self.ensure_buffer(src)
dst = self.ensure_buffer(dst)
backend_kwds = {
"cl_env": tg.backend.cl_env,
"typegen": tg.op.typegen,
"autotuner_config": tg.op.autotuner_config,
"build_opts": tg.op.build_options(),
}
kernel = OpenClAutotunableTransposeKernel(**backend_kwds)
transpose, _ = kernel.autotune(
axes=axes,
hardcode_arrays=True,
is_inplace=False,
input_buffer=src,
output_buffer=dst,
)
return transpose.build_launcher(in_base=src.base_data, out_base=dst.base_data)
[docs]
def plan_fill_zeros(self, tg, a, slices):
if not slices:
return
a = self.ensure_buffer(a)
launcher = OpenClKernelListLauncher(name="fill_zeros")
for slc in slices:
lnc = OpenClFillKernelLauncher.from_slices(
varname="buffer", backend=tg.backend, dst=a[slc], fill_value=0
)
launcher += lnc
return launcher
[docs]
def plan_compute_energy(
self, tg, fshape, src, dst, transforms, mutexes, method="round", target=None
):
(N, NS2, C2C, R2C, S) = super().plan_compute_energy(
tg, fshape, src, dst, transforms
)
# We do not forget the hermitian symmetry:
# normally: E = 1/2 |Fi|**2
# but for a R2C transform: E = 1/2 |Fi|**2 + 1/2 |Fi*|**2 = 1 |Fi|**2
C = 2 ** (R2C - 1)
import numpy as np
import sympy as sm
from hysop.tools.numerics import is_complex, is_fp, complex_to_float_dtype
from hysop.symbolic import local_indices_symbols
from hysop.symbolic.relational import Assignment, LogicalGT, Round
from hysop.symbolic.misc import Cast, Select, CriticalCodeSection
from hysop.symbolic.complex import cabs2
dim = src.ndim
dtype = src.dtype
ftype = dtype if is_fp(dtype) else complex_to_float_dtype(dtype)
itype = np.int32
I = local_indices_symbols[:dim][::-1]
(src,) = self.kernel_generator.arrays_to_symbols(src)
dst, mutexes = self.kernel_generator.buffer_to_symbols(dst, mutexes)
(k,) = self.kernel_generator.symbolic_tmp_scalars("k", dtype=itype)
(E,) = self.kernel_generator.symbolic_tmp_scalars("E", dtype=ftype)
K = self.kernel_generator.symbolic_tmp_scalars("K", dtype=ftype, count=dim)
exprs = ()
assert len(K) == len(I) == len(N) == len(NS2) == len(C2C)
for Ki, Ii, Ni, Nis2, c2c in zip(K, I, N, NS2, C2C):
if c2c:
rhs = Select(Ii, Ni - Ii, LogicalGT(Ii, Nis2))
else:
rhs = Ii
e = Assignment(Ki, rhs)
exprs += (e,)
if dim == 1:
exprs += (Assignment(k, K[0]),)
else:
exprs += (Assignment(k, Cast(Round(sm.sqrt(np.dot(K, K))), itype)),)
exprs += (Assignment(E, C * S * S * cabs2(src)),)
action = Assignment(dst[k], dst[k] + E)
exprs += (CriticalCodeSection(action, mutexes, k),)
# this kernel is not optimized (we use a __global mutex for each wavenumber for now)
launcher, _ = self.kernel_generator.elementwise_kernel(
"compute_energy",
*exprs,
force_volatile=(dst,),
max_candidates=1,
debug=False,
)
return launcher
[docs]
@classmethod
def ensure_buffer(cls, get_buffer):
if callable(get_buffer):
buf = get_buffer()
else:
buf = get_buffer
return buf